How Attention Works ?¶

Source: https://github.com/jessevig/bertviz

In [12]:
# pip install bertviz
In [13]:
from transformers import AutoTokenizer, AutoModel, utils
from bertviz import model_view
utils.logging.set_verbosity_error()  # Suppress standard warnings
In [4]:
model_name = "microsoft/xtremedistil-l12-h384-uncased"  # Find popular HuggingFace models here: https://huggingface.co/models
In [20]:
input_text = "Hello"  
In [5]:
model = AutoModel.from_pretrained(model_name, output_attentions=True)  # Configure model to return attention values
Downloading:   0%|          | 0.00/527 [00:00<?, ?B/s]
C:\Users\aklof\AppData\Roaming\Python\Python39\site-packages\huggingface_hub\file_download.py:125: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\Users\aklof\.cache\huggingface\hub. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
  warnings.warn(message)
Downloading:   0%|          | 0.00/134M [00:00<?, ?B/s]
In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]
In [26]:
# Prepare inputs and compute attention:
inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text

outputs = model(inputs)  # Run model

attention = outputs[-1]  # Retrieve attention from model outputs

tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
In [27]:
model_view(attention, tokens)  # Display model view
In [28]:
input_text = "My name is Aleksandra"  
In [29]:
inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text

outputs = model(inputs)  # Run model

attention = outputs[-1]  # Retrieve attention from model outputs

tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
In [30]:
model_view(attention, tokens)  # Display model view
In [31]:
input_text = "Biden Visits Kyiv, Ukraine’s Embattled Capital, as Air-Raid Siren Sounds President Biden took a nearly 10-hour train ride from the border of Poland to show his administration’s unwavering support nearly a year into Russia invasion."
In [32]:
inputs = tokenizer.encode(input_text, return_tensors='pt')  # Tokenize input text

outputs = model(inputs)  # Run model

attention = outputs[-1]  # Retrieve attention from model outputs

tokens = tokenizer.convert_ids_to_tokens(inputs[0])  # Convert input ids to token strings
In [33]:
model_view(attention, tokens)  # Display model view
In [ ]: